In [1]:
import os
import sys
CURRENT_DIR = os.path.abspath(os.path.dirname(__name__))
CNN_EXAMPLE_FILES = os.path.join(CURRENT_DIR, '..', 'examples', 'cnn')
VGG19_WEIGHTS_FILE = os.path.join(CNN_EXAMPLE_FILES, 'files', 'vgg19.hdf5')
IMAGE_DIR = os.path.join(CURRENT_DIR, 'caltech_101_images')
sys.path.append(CNN_EXAMPLE_FILES)
In [2]:
os.listdir(IMAGE_DIR)[:10]
Out[2]:
In [3]:
beaver_images = os.listdir(os.path.join(IMAGE_DIR, 'beaver'))
beaver_images[:10]
Out[3]:
In [4]:
image_classes = [
'beaver',
'cougar_body',
'gerenuk',
'kangaroo',
'leopards',
'llama',
'okapi',
'platypus',
'wild_cat',
]
In [5]:
import random
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline
random.seed(0)
images = []
index = 1
fig = plt.figure(figsize=(12, 9))
for name in image_classes:
path = os.path.join(IMAGE_DIR, name)
if index == 10:
break
if os.path.isdir(path):
image_name = random.choice(os.listdir(path))
image_path = os.path.join(path, image_name)
image = mpimg.imread(image_path)
plt.subplot(3, 3, index)
plt.title(name.capitalize().replace('_', ' '))
plt.imshow(image)
plt.axis('off')
index += 1
fig.tight_layout()
In [6]:
# requires also modules: requests and tqdm
from imagenet_tools import download_file, load_image, deprocess
In [7]:
from neupy import architectures
vgg19 = architectures.vgg19()
vgg19
Out[7]:
In [8]:
import os
from neupy import storage
if not os.path.exists(VGG19_WEIGHTS_FILE):
download_file(
url="http://neupy.s3.amazonaws.com/tensorflow/imagenet-models/vgg19.hdf5",
filepath=VGG19_WEIGHTS_FILE,
description='Downloading weights')
storage.load(vgg19, VGG19_WEIGHTS_FILE)
In [9]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
%matplotlib inline
images = []
image_paths = []
for name in tqdm(image_classes):
path = os.path.join(IMAGE_DIR, name)
for image_name in os.listdir(path):
image_path = os.path.join(path, image_name)
image = load_image(
image_path,
image_size=(224, 224),
crop_size=(224, 224))
images.append(image)
image_paths.append(image_path)
images = np.concatenate(images, axis=0)
image_paths = np.array(image_paths)
images.shape
Out[9]:
In [10]:
# Note: It's important to use dense layer, because SOFM expect to see vectors
dense_2 = vgg19.end('dense_2')
batch_size = 16
outputs = []
for batch in tqdm(range(0, len(images), batch_size)):
output = dense_2.predict(images[batch:batch + batch_size])
outputs.append(output)
dense_2_output = np.concatenate(outputs, axis=0)
dense_2_output.shape
Out[10]:
In [12]:
from neupy import algorithms, utils
utils.reproducible()
sofm = algorithms.SOFM(
n_inputs=dense_2_output.shape[1],
# Feature map grid is 2 dimensions and has
# 400 output clusters (20 * 20).
features_grid=(20, 20),
# Closest neuron (winning neuron) measures
# using cosine similarity
distance='cos',
# Sample weights from the data.
# Every weight vector will be just a sample
# from the input data. In this way we can
# ensure that initialized map will cover data
# at the very beggining.
weight='sample_from_data',
# Defines radius within we consider near by
# neurons as neighbours relatively to the
# winning neuron
learning_radius=5,
# Large radius is efficient only for the first
# iterations, that's why we reduce it by 1
# every 5 epochs.
reduce_radius_after=5,
# The further the neighbour neuron from the winning
# neuron the smaller learning rate for it. How much
# smaller the learning rate controls by the `std`
# parameter. The smaller `std` the smaller learning
# rate for neighboring neurons.
std=0.1,
# Neighbours within
reduce_std_after=5,
# Learning rate
step=0.001,
# Learning rate is going to be reduced every 5 epochs
reduce_step_after=5,
# Shows training progress in terminal
verbose=True,
# Shuffle data samples before every training epoch
shuffle_data=True,
)
sofm.train(dense_2_output, epochs=32)
In [13]:
from __future__ import division
from scipy.misc import imread
import matplotlib.gridspec as gridspec
def draw_grid(sofm, images, output_features):
data = images
clusters = sofm.predict(output_features).argmax(axis=1)
grid_height, grid_weight = sofm.features_grid
plt.figure(figsize=(16, 16))
grid = gridspec.GridSpec(grid_height, grid_weight)
grid.update(wspace=0, hspace=0)
for row_id in range(grid_height):
print("Progress: {:.2%}".format(row_id / grid_weight))
for col_id in range(grid_weight):
index = row_id * grid_height + col_id
clustered_samples = data[clusters == index]
if len(clustered_samples) > 0:
# We take the first sample, but it can be any
# sample from this cluster (random or the one
# that closer to the center)
sample = deprocess(clustered_samples[0])
else:
# If we don't have samples in cluster then
# it means that there is a gap in space
sample = np.zeros((224, 224, 3))
plt.subplot(grid[index])
plt.imshow(sample)
plt.axis('off')
print("Progress: 100%")
return sample
In [14]:
sample = draw_grid(sofm, images, dense_2_output)